from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # 逻辑回归
from sklearn.preprocessing import StandardScaler

"""
逻辑回归算法  使用随机梯度下降  获取 回归系数；因此需要数据标准化
"""

cancer = load_breast_cancer()
# 569样本  30个特征
print("keys", cancer.keys())

# print(cancer.DESCR)

# 获取特征和标签
X, y = cancer.data, cancer.target

# print("标签", cancer.target_names)  # 0恶性  1良性


X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                    random_state=1
                                                    )

std = StandardScaler()
std.fit(X_train)  # 在训练集在拟合【计算每列特征均值和标准差】
X_train = std.transform(X_train)  # 【转换】
X_test = std.transform(X_test)  # 【转换】

alg = LogisticRegression()

alg.fit(X_train, y_train)

print("准确率", alg.score(X_test, y_test))
# print("测试集", y_test.sum(), len(y_test))

print("系数", alg.coef_)
print("截距", alg.intercept_)

print("预测结果", alg.predict(X_test))

# (114, 30)*(1, 30)
print("预测结果_自己实现", (X_test*alg.coef_).sum(axis=1) + alg.intercept_)
print("decision_function", alg.decision_function(X_test))
